{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "91042c76-b05a-43f6-a3ee-7fb415c011b2",
   "metadata": {},
   "source": [
    "执行命令需要在同一行命令中，先source环境名(base,python27,python36,python37,python38,python39,python310,cube-studio)才能pip安装到指定环境，如果不知道有哪些虚拟环境，可以conda info --envs查看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5834674a-ac83-4c20-986f-602af81fcd65",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n",
      "Requirement already satisfied: pandas in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (2.0.1)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: numpy>=1.20.3 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (1.24.3)\n",
      "Requirement already satisfied: pytz>=2020.1 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: six>=1.5 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!source activate cube-studio && pip install pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e43791c2-5d05-4e0c-926e-7baa95519ae0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO: Pandarallel will run on 10 workers.\n",
      "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n"
     ]
    }
   ],
   "source": [
    "import json, os, time, shutil, pandas\n",
    "\n",
    "from cubestudio.request.model_client import Client,init\n",
    "from cubestudio.dataset.dataset import Dataset\n",
    "from pandarallel import pandarallel\n",
    "\n",
    "# Initialization\n",
    "pandarallel.initialize(nb_workers=10)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1e690c83-e6e7-4f14-8b8d-0dcc0f666d82",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "初始化验证成功\n"
     ]
    }
   ],
   "source": [
    "# 初始化客户端\n",
    "HOST = 'http://kubeflow-dashboard.infra'\n",
    "token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJjdWJlLXN0dWRpbyIsInN1YiI6ImFkbWluIn0.z4XJRqUI4v39MUYDUKdIuQsP5QlRENyVkQIp6a-1fb0'\n",
    "username='admin'\n",
    "init(host=HOST,username=username,token=token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "fe41d6fc-7701-4f21-b97e-7228651fa19f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 生成一个加密秘钥\n",
    "# from cryptography.fernet import Fernet\n",
    "# key=Fernet.generate_key()\n",
    "# print(key)\n",
    "key = b'aViHLsGcYgmzMJrS98N2yRD3oTPMZf5JcZvKzr47f6E='"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "91d6b0d6-afcf-4b4f-a16a-6aee76641d64",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 定义一个数据集\n",
    "dataset = Client(Dataset).one(name=\"coco\")\n",
    "if not dataset:\n",
    "    dataset = Client(Dataset).add(name='coco', version='v2014', label='coco未标注数据集', describe='来自于2014年数据，未标注的coco数据集',icon='https://pic2.zhimg.com/80/v2-399df41d8562f8f09b98d288b97c8f8d_1440w.webp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b79da363-a82b-439e-9ea4-176a0d9abf0e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准备上传本地数据 ['coco.zip.crypt']\n",
      "数据集上传完成 ['coco.zip.crypt']\n"
     ]
    }
   ],
   "source": [
    "# 上传数据集\n",
    "features = json.dumps(json.load(open('vision/coco/data.json')),indent=4,ensure_ascii=False)\n",
    "dataset = dataset.update(path='',features=features)\n",
    "# 压缩\n",
    "dataset.compress('coco.zip','vision/coco')\n",
    "# 加密\n",
    "dataset.encrypt('coco.zip',\"coco.zip.crypt\",key)\n",
    "# 上传\n",
    "dataset.upload('coco.zip.crypt',partition='20230201')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "43627141-cea4-4e03-a80b-7a624e9685f0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准备下载数据到 /mnt/admin/pipeline/example/dataset\n",
      "['http://kubeflow-dashboard.infra/static/dataset/coco/2014/coco.zip.crypt']\n",
      "begin donwload /mnt/admin/pipeline/example/dataset/coco.zip.crypt from http://kubeflow-dashboard.infra/static/dataset/coco/2014/coco.zip.crypt\n",
      "下载数据完成 /mnt/admin/pipeline/example/dataset\n"
     ]
    }
   ],
   "source": [
    "# 下载数据集\n",
    "os.remove('coco.zip.crypt')  if os.path.exists('coco.zip.crypt') else ''\n",
    "os.remove('coco.zip')  if os.path.exists('coco.zip') else ''\n",
    "shutil.rmtree('coco')  if os.path.exists('coco') else ''\n",
    "dataset.download(partition='20230201')\n",
    "dataset.decrypt(\"coco.zip.crypt\",'coco.zip',key)\n",
    "dataset.decompress('coco.zip','coco')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0f2cd280-e8db-4b15-b612-596ce7463f2d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 685 entries, 0 to 684\n",
      "Data columns (total 8 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   id           685 non-null    object \n",
      " 1   image        685 non-null    object \n",
      " 2   class_name   685 non-null    object \n",
      " 3   class_index  685 non-null    int64  \n",
      " 4   x            685 non-null    float64\n",
      " 5   y            685 non-null    float64\n",
      " 6   width        685 non-null    float64\n",
      " 7   height       685 non-null    float64\n",
      "dtypes: float64(4), int64(1), object(3)\n",
      "memory usage: 42.9+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>class_index</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>width</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>685.000000</td>\n",
       "      <td>685.000000</td>\n",
       "      <td>685.000000</td>\n",
       "      <td>685.000000</td>\n",
       "      <td>685.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>24.878832</td>\n",
       "      <td>0.500747</td>\n",
       "      <td>0.524421</td>\n",
       "      <td>0.197138</td>\n",
       "      <td>0.232435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>23.414175</td>\n",
       "      <td>0.240506</td>\n",
       "      <td>0.234180</td>\n",
       "      <td>0.239847</td>\n",
       "      <td>0.244256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.010890</td>\n",
       "      <td>0.018115</td>\n",
       "      <td>0.003922</td>\n",
       "      <td>0.004860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.316008</td>\n",
       "      <td>0.394062</td>\n",
       "      <td>0.038281</td>\n",
       "      <td>0.055323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>20.000000</td>\n",
       "      <td>0.510065</td>\n",
       "      <td>0.527486</td>\n",
       "      <td>0.104874</td>\n",
       "      <td>0.118542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>46.000000</td>\n",
       "      <td>0.675977</td>\n",
       "      <td>0.695495</td>\n",
       "      <td>0.241203</td>\n",
       "      <td>0.337083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>79.000000</td>\n",
       "      <td>0.992520</td>\n",
       "      <td>0.988594</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.997410</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       class_index           x           y       width      height\n",
       "count   685.000000  685.000000  685.000000  685.000000  685.000000\n",
       "mean     24.878832    0.500747    0.524421    0.197138    0.232435\n",
       "std      23.414175    0.240506    0.234180    0.239847    0.244256\n",
       "min       0.000000    0.010890    0.018115    0.003922    0.004860\n",
       "25%       0.000000    0.316008    0.394062    0.038281    0.055323\n",
       "50%      20.000000    0.510065    0.527486    0.104874    0.118542\n",
       "75%      46.000000    0.675977    0.695495    0.241203    0.337083\n",
       "max      79.000000    0.992520    0.988594    1.000000    0.997410"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas\n",
    "# 数据集加载\n",
    "data = pandas.read_csv('coco/data.csv')\n",
    "# 查看数据集的基本信息：\n",
    "data.info()\n",
    "# 查看数据集的统计描述：\n",
    "data.describe()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cube-studio",
   "language": "python",
   "name": "cube-studio"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
